import requests
import json
import re
import openpyxl

headers = {
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) "
                  "Chrome/108.0.0.0 Safari/537.36 "
}

# 把狗日的数据写到狗日的excel里
wb = openpyxl.Workbook()
ws = wb.active
ws['A1'] = 'id'
ws['B1'] = '中文名'
ws['C1'] = '英文名'
ws['D1'] = '缩写'
ws['E1'] = '创建年份'
ws['F1'] = '总部所在城市'
ws['G1'] = '总部所在国家'
ws['H1'] = '官方语言'
ws['I1'] = '官方网址'
ws['J1'] = '地址'
ws['K1'] = '历史'
ws['L1'] = '地区'
ws['M1'] = '会议'
ws['N1'] = 'subordinate'
ws['O1'] = '成员'
ws['P1'] = '组织类型'
ws['Q1'] = '组织区域类型'
ws['R1'] = '组织主题分类'
w = 0

# 外层for循环翻页
for i in range(1, 26):
    param = {  # url构造参数 + pageNum翻页
        'type': '1',
        'cnName': '',
        'elName': '',
        'abbreviation': '',
        'setupyear': '',
        'city': '',
        'state': '',
        'member': '',
        # 'pageNum': '1',
        'pageNum': f"{i}",
        'pageSize': '10',
        'themeClassify': '04',
        'areaClassify': '',
    }
    url = "https://www.crggcn.com/admin/api/organ/search"
    res = requests.post(url, params=param, headers=headers)
    data = res.text
    # # print(data)
    # obj = re.compile(r"(?P<json>.*?)")
    # content = obj.search(data).group('json')
    # print(content)
    dic = json.loads(data)
    # print(dic)
    rows = dic['pageOrgan']['rows']  # 要抓取的数据在pageOrgan下的rows列表内
    # print(rows)
    for each in rows:
        ids = each['id']
        cnName = each['cnName']
        elName = each['elName']
        abbr = each['abbreviation']
        establishmentYear = each['setupyear']
        city = each['city']
        state = each['state']
        language = each['language']
        url = each['url']
        site = each['site']
        historical = each['historical']
        religion = each['religion']
        conference = each['conference']
        subordinate = each['subordinate']
        member = each['member']
        propertyClassify = each['propertyClassify']
        areaClassify = each['areaClassify']
        themeClassify = each['themeClassify']
        # print(f"{cnName}的总部在{city}")
        data = [ids, cnName, elName, abbr, establishmentYear, city, state, language, url, site, historical, religion,
                conference, subordinate, member, propertyClassify, areaClassify, themeClassify]
        print(data)
        ws.cell(w + 2, 1, data[0])
        ws.cell(w + 2, 2, data[1])
        ws.cell(w + 2, 3, data[2])
        ws.cell(w + 2, 4, data[3])
        ws.cell(w + 2, 5, data[4])
        ws.cell(w + 2, 6, data[5])
        ws.cell(w + 2, 7, data[6])
        ws.cell(w + 2, 8, data[7])
        ws.cell(w + 2, 9, data[8])
        ws.cell(w + 2, 10, data[9])
        ws.cell(w + 2, 11, data[10])
        ws.cell(w + 2, 12, data[11])
        ws.cell(w + 2, 13, data[12])
        ws.cell(w + 2, 14, data[13])
        ws.cell(w + 2, 15, data[14])
        ws.cell(w + 2, 16, data[15])
        ws.cell(w + 2, 17, data[16])
        ws.cell(w + 2, 18, data[17])
        w += 1

    wb.save('IOs.xlsx')
